{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import numpy as np\n",
    "import os\n",
    "import sys\n",
    "sys.path.append(\"../../../ecg\")\n",
    "\n",
    "import load\n",
    "import util\n",
    "\n",
    "def fleiss_kappa(ratings):\n",
    "    \"\"\"\n",
    "    Args:\n",
    "        ratings: An N x R numpy array. N is the number of\n",
    "            samples and R is the number of reviewers. Each\n",
    "            entry (n, r) is the category assigned to example\n",
    "            n by reviewer r.\n",
    "    Returns:\n",
    "        Fleiss' kappa score.\n",
    "    https://en.wikipedia.org/wiki/Fleiss%27_kappa\n",
    "    \"\"\"\n",
    "    N, R = ratings.shape\n",
    "    NR =  N * R\n",
    "    categories = set(ratings.ravel().tolist())\n",
    "    P_example = -np.full(N, R)\n",
    "    p_class = 0.0\n",
    "    for c in categories:\n",
    "        c_sum = np.sum(ratings == c, axis=1)\n",
    "        P_example += c_sum**2\n",
    "        p_class += (np.sum(c_sum) / float(NR)) ** 2\n",
    "    P_example = np.sum(P_example) / float(NR * (R-1))\n",
    "    k = (P_example - p_class) / (1 - p_class)\n",
    "    return k\n",
    "\n",
    "def average_pairwise_agreement(revs):\n",
    "    \"\"\"\n",
    "    Here, we use the same method as the diabetic\n",
    "    retinopathy paper. The number of pair-wise\n",
    "    agreements over the total number of pairwise\n",
    "    comparisons.\n",
    "    \"\"\"\n",
    "    corr = 0\n",
    "    tot = 0\n",
    "    n_revs = len(revs)\n",
    "    for i in range(n_revs):\n",
    "        for j in range(i+1, n_revs):\n",
    "            c = np.sum(revs[i] == revs[j])\n",
    "            t = revs[i].size\n",
    "            corr += c\n",
    "            tot += t\n",
    "    return corr / float(tot)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_path = \"/deep/group/awni/ecg_models/default/1527627404-9/0.337-0.880-012-0.255-0.906.hdf5\"\n",
    "preproc = util.load(os.path.dirname(model_path))\n",
    "\n",
    "revs = []\n",
    "for i in range(6):\n",
    "    with open(\"../test_rev{}.json\".format(i), 'r') as fid:\n",
    "        revs.append([json.loads(l)['labels'] for l in fid])\n",
    "revs = [np.argmax(preproc.process_y(r), axis=2) for r in revs]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sequence Level Agreements"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t     Fleiss' kappa  \t Avg Pairwise\n",
      "AF           0.613 \t\t 0.904\n",
      "AVB          0.707 \t\t 0.950\n",
      "BIGEMINY     0.796 \t\t 0.989\n",
      "EAR          0.407 \t\t 0.977\n",
      "IVR          0.535 \t\t 0.978\n",
      "JUNCTIONAL   0.610 \t\t 0.956\n",
      "NOISE        0.729 \t\t 0.962\n",
      "SINUS        0.678 \t\t 0.841\n",
      "SVT          0.398 \t\t 0.961\n",
      "TRIGEMINY    0.783 \t\t 0.987\n",
      "VT           0.500 \t\t 0.992\n",
      "WENCKEBACH   0.496 \t\t 0.962\n",
      "\n",
      "All          0.645 \t\t 0.730\n"
     ]
    }
   ],
   "source": [
    "print \"\\t     Fleiss' kappa  \\t Avg Pairwise\"\n",
    "for e, c in enumerate(preproc.classes):\n",
    "    binary_revs = [np.reshape(r == e, -1) for r in revs]\n",
    "    print \"{:<10}   {:.3f} \\t\\t {:.3f}\".format(\n",
    "        c, fleiss_kappa(np.stack(binary_revs, axis=1)),\n",
    "        average_pairwise_agreement(binary_revs)) \n",
    "print\n",
    "ratings = np.hstack([r.reshape(-1, 1) for r in revs])\n",
    "print \"{:<10}   {:.3f} \\t\\t {:.3f}\".format(\n",
    "        \"All\", fleiss_kappa(ratings),\n",
    "        average_pairwise_agreement(revs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set-level agreements (can only compute for a given rhythm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t     Fleiss' kappa  \t Avg Pairwise\n",
      "AF           0.591 \t\t 0.873\n",
      "AVB          0.703 \t\t 0.929\n",
      "BIGEMINY     0.791 \t\t 0.976\n",
      "EAR          0.415 \t\t 0.950\n",
      "IVR          0.645 \t\t 0.944\n",
      "JUNCTIONAL   0.607 \t\t 0.928\n",
      "NOISE        0.625 \t\t 0.924\n",
      "SINUS        0.666 \t\t 0.858\n",
      "SVT          0.485 \t\t 0.921\n",
      "TRIGEMINY    0.732 \t\t 0.971\n",
      "VT           0.677 \t\t 0.967\n",
      "WENCKEBACH   0.609 \t\t 0.947\n"
     ]
    }
   ],
   "source": [
    "print \"\\t     Fleiss' kappa  \\t Avg Pairwise\"\n",
    "for e, c in enumerate(preproc.classes):\n",
    "    binary_revs = [np.any(r == e, axis=1) for r in revs]\n",
    "    print \"{:<10}   {:.3f} \\t\\t {:.3f}\".format(\n",
    "        c, fleiss_kappa(np.stack(binary_revs, axis=1)),\n",
    "        average_pairwise_agreement(binary_revs))   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Confusions between \"AFIB\" and \"AFL\" and \"AVB type 2 second degree\" and \"AVB third degree\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
